Dimensionality Reduction of TCGA and GTEx Samples Via TriMap¶

Dimensionality reductions of several different tissue/subtype groupings for samples in The Cancer Genome Atlas [{The Cancer Genome Atlas Research Network} and Weinstein et al. 2013. ] and the Genotype Tissue Expression Consortium [Consortium et al. 2015. ] using TriMap [Amid et al. 2018. ] a method that attempts to preserve more global structure and euclidean distance than methods like t-SNE.

In [15]:
import os
import pickle
import pandas as pd
import numpy as np
import rnaseq_lib as r
import holoviews as hv
hv.extension('bokeh', logo=False)

# Read in data - Synapse ID: syn12009613
data_path = '/mnt/data/Objects/tcga_gtex_data.hd5'
exp = pd.read_hdf(data_path, key='exp')
met = pd.read_hdf(data_path, key='met')

# Cast expression data as float64 which is required for TriMap
exp = exp.astype(np.float64)

# Construct combined dataframe and Holoview wrapper
df = r.data.add_metadata_to_exp(exp, met)
h = r.plot.Holoview(df)

# Color scheme for sample counts
colors = ['green', 'blue', 'yellow', 'red']

# Truncate type names
df.type = [x[:20] for x in df.type]
In [ ]:
# Set global Scatter options
opts = {'Scatter': {'plot': dict(color_index='type', width=750, ),
                    'style': dict(size=5, alpha=0.25, hover_alpha=0.75, cmap='Set1')}}

# Define tissue comparison sets
tissues = [['Adrenal'], 
           ['Bile', 'Liver'],
           ['Bladder', 'Kidney'],
           ['Bone_marrow', 'Whole', 'Cells'],
           ['Brain'],
           ['Breast'],
           ['Cervix', 'Ovary', 'Uterus', 'Vagina'],
           ['Colon', 'Small_intestine', 'Stomach', 'Esophagus'],
           ['Eye', 'Brain'],
           ['Head', 'Skin', 'Minor'],
           ['Kidney'],
           ['Liver'],
           ['Lung', 'Pleura'],
           ['Lymph', 'Cells', 'Whole'],
           ['Pancreas'],
           ['Prostate'],
           ['Skin'],
           ['Soft_tissue', 'Muscle', 'Adipose'],
           ['Stomach'],
           ['Testis'],
           ['Thyroid'],
           ['Uterus']]

hmap = {'_'.join(x): h.trimap(h.genes, tissue_subset=x).opts(opts) for x in tissues}
hmap_plot = hv.HoloMap(hmap, kdims='Tissue(s)')
In [19]:
%%opts Scatter [width=725 height=500 legend_position='left' color_index='type'] 
%%opts Scatter [finalize_hooks=[r.plot.disable_logo]]
%%opts Scatter (size=5 alpha=0.25 hover_alpha=0.75 cmap='Set1')
%%opts Scatter {+axiswise +framewise}
hmap_plot
Out[19]: